# RelativesAnalysis_09202015.R 
# This file replicates the analyses and figures in 'Incumbency Advantage and Family Success in Local Elections' that pertain to Barangay Captain relatives.

rm(list=ls())

library(stringr)
library(gtools)
library(plyr)
library(rdd)
library(rdrobust)
library(pscl)
library(stargazer)
library(xtable)
library(doBy)
library(dplyr)
library(multiwayvcov)
library(ggplot2)
library(grid)
library(gridExtra)
library(ggthemes)



# SET YOUR WD (THE DIRECTORY TO THE FOLDER R&P_09202015) HERE:
#setwd("~/Dropbox/R&P_09202015")


# Call Functions
source('functions/plot.RD.R')
source('functions/rdpoints.R')
source('functions/rdplot2.R')

# Load Relatives Data--Many observation per barangay
data <- read.csv("data/data_2ormore.csv")

# Load Incumbency Data--One observation per barangay
samp_inc <- read.csv("data/samp_inc.csv")


# ----- Option: Conditionality ----- #
# The analysis can be conducted conditional on relatives running or unconditionally. 
# Unconditional
data$propRel[is.na(data$propRel)] <- 0
data$avgVS[is.na(data$avgVS)] <- 0
data$percentRelWin[is.na(data$percentRelWin)] <- 0

# Conditional
# data$avgVS[data$avgVS==0] <- NA
# data$percentRelWin[data$percentRelWin==0] <- NA
# ---------------------------------- #


# Sample One Barangay Captain Candidate per Barangay
by_psgc <- group_by(data, PSGC)
set.seed(25001)
samp <- sample_n(by_psgc, 1)

# Confirm we have the same barangay as the incumbents analysis
samp <- samp[which(samp$PSGC %in% samp_inc$PSGC),]

# Sort
samp <- samp[order(samp$PSGC),]
samp_inc <- samp_inc[order(samp_inc$PSGC),]



# ----- McCrary Test ----- #
pdf(file="figures/MC2.pdf", width=8, heigh=6)
MC2 <- DCdensity(runvar=samp$MV.pb, cutpoint=0, bin = NULL, bw = NULL, verbose = FALSE, plot = TRUE, ext.out = TRUE)
MC2$th <- MC2$theta
title(xlab="Margin of Victory (Barangay Captain), 2010")
text(x=.4, y= 2, labels=paste0("P=",round(MC2$p, 3)), cex=2)
# text(x=.4, y= 2.5, labels=paste0(expression(theta),"="))
dev.off()

str(MC2)



# ----------------------------------- #
# ----- Non-Parametric Analyses ----- #
# ----------------------------------- #
# ----- DV 1: Number of running in t+1 / total number of candidates ----- #
RD2a <- RDestimate(formula= propRel ~ MV.pb, cutpoint=0, subset=NULL, data=samp, cluster=as.factor(samp$province), model=TRUE, bw=NULL)
summary(RD2a)

pdf("figures/RD2a.pdf", width=7.5, height=5)
	plot.RD(RD2a, ciCol="black", midCol="red", alpha=.1, range=c(-0.4,0.4))
	abline(v=0, col="black")
	title(main="Family Entry in Council Elections", ylab="Family Entry, Election t+1", xlab="Margin of Victory, Election t")
	rdpoints(y=samp$propRel, x=samp$MV.pb, data=samp, subset=NULL, c=0, cex=.6, col.bin="black")
dev.off()



# ----- DV 2: Family Vote Share in t+1 ----- #
RD2b <- RDestimate(formula= avgVS ~ MV.pb, cutpoint=0, subset=NULL, data=samp, cluster=as.factor(samp$province), model=TRUE, bw=NULL)
summary(RD2b)

pdf("figures/RD2b.pdf", width=7.5, height=5)
	plot.RD(RD2b, ciCol="black", midCol="red", alpha=.1, range=c(-0.4, 0.4))
	abline(v=0, col="black")
	title(main="Family Vote Share in Council Elections", ylab="Family Vote Share, Election t+1", xlab="Margin of Victory, Election t")
	rdpoints(y=samp$avgVS, x=samp$MV.pb, data=samp, subset=NULL, c=0, cex=.6, col.bin="black")
dev.off()


# ----- DV 3: Percent of Relatives Who Were Elected in t+1 ----- #
RD2c <- RDestimate(formula= percentRelWin ~ MV.pb, cutpoint=0, subset=NULL, data=samp, cluster=as.factor(samp$province), model=TRUE, bw=NULL)
summary(RD2c)

pdf("figures/RD2c.pdf", width=7.5, height=5)
	plot.RD(RD2c, ciCol="black", midCol="red", alpha=.1, range=c(-0.4,0.4))
	abline(v=0, col="black")
	title(main="Relatives in Council Seats", ylab="Percentage of Relative Candidates Winning Election t+1", xlab="Margin of Victory, Election t")
	rdpoints(y=samp$percentRelWin, x=samp$MV.pb, data=samp, subset=NULL, c=0, cex=.6, col.bin="black")
dev.off()




# ------------------------------ #
# ---- Parametric Analyses ----- #
# ------------------------------ #
samp$win10 <- 0
samp$win10[samp$rank.pb==1] <- 1

#library(multiwayvcov)

summary(m2a <- lm(data=samp[which(abs(samp$MV.pb)<=RD2a$bw[1]),], formula=propRel ~ MV.pb + win10))
	m2ases <-cluster.vcov(m2a, samp[which(abs(samp$MV.pb)<=RD2a$bw[1]),]$province)
	m2ases <- coeftest(m2a, m2ases)[,2]


	summary(m2aOLS <- lm(data=samp, formula=propRel ~ MV.pb + win10))
	m2aOLSses <-cluster.vcov(m2aOLS, samp$province)
	m2aOLSses <- coeftest(m2aOLS, m2aOLSses)[,2]

summary(m2b <-lm(data=samp[which(abs(samp$MV.pb)<=RD2b$bw[1]),], formula=avgVS ~ MV.pb + win10))
	m2bses <-cluster.vcov(m2b, samp[which(abs(samp$MV.pb)<=RD2b$bw[1]),]$province)
	m2bses <- coeftest(m2b, m2bses)[,2]


	summary(m2bOLS <-lm(data=samp, formula=avgVS ~ MV.pb + win10))
	m2bOLSses <-cluster.vcov(m2bOLS, samp$province)
	m2bOLSses <- coeftest(m2bOLS, m2bOLSses)[,2]

summary(m2c <-lm(data=samp[which(abs(samp$MV.pb)<=RD2c$bw[1]),], formula=percentRelWin ~ MV.pb + win10))
	m2cses <-cluster.vcov(m2c, samp[which(abs(samp$MV.pb)<=RD2c$bw[1]),]$province)
	m2cses <- coeftest(m2c, m2cses)[,2]

	summary(m2cOLS <-lm(data=samp, formula=percentRelWin ~ MV.pb + win10))
	m2cOLSses <-cluster.vcov(m2cOLS, samp$province)
	m2cOLSses <- coeftest(m2cOLS, m2cOLSses)[,2]


stargazer(m2aOLS, m2a, m2bOLS, m2b, m2cOLS, m2c, out="tables/ols2.tex", type="latex", digits=3, label="tab:ols2", dep.var.labels = c("Relatives/Candidates","Relative Vote Share","\\% Relatives Won"), covariate.labels=c("Margin of Victory, 2010", "Win, 2010","Constant"), title="Parametric analysis of the effect of incumbency on relative entry, vote share and winning. Standard errors are clustered by province. Uses rectangular kernel weights. Models 1, 3, and 5 use the entirity of the data, while 2, 4, and 6 use only points within the IK Bandwith. The coefficient of interest is `Win, 2010'.", add.lines=list(c("BW",1,round(RD2a$bw[1],3), 1 ,round(RD2b$bw[1],3), 1, round(RD2c$bw[1],3))), omit.stat=c("f","ser"), se=list(m2aOLSses,m2ases,m2bOLSses,m2bses,m2cOLSses,m2cses), float.env="table*", model.names=TRUE)


# ----- With Controls ----- #
samp$lnpop2010 <- log(samp$pop2010)
samp <- samp[is.finite(samp$lnpop2010), ]
samp$urbanRural[samp$urbanRural==levels(samp$urbanRural)[1]] <- NA
samp$urbanRural <- droplevels(samp$urbanRural)

#library(multiwayvcov)

summary(m2a <- lm(data=samp[which(abs(samp$MV.pb)<=RD2a$bw[1]),], formula=propRel ~ MV.pb + win10 + urbanRural + lnpop2010 ))
	m2ases <- cluster.vcov(m2a, as.matrix(samp[which(abs(samp$MV.pb)<=RD2a$bw[1]),]$province))
	m2ases <- coeftest(m2a, m2ases)[,2]


	summary(m2aOLS <- lm(data=samp, formula=propRel ~ MV.pb + win10 + urbanRural + lnpop2010))
	m2aOLSses <-cluster.vcov(m2aOLS, as.matrix(samp$province))
	m2aOLSses <- coeftest(m2aOLS, m2aOLSses)[,2]

summary(m2b <-lm(data=samp[which(abs(samp$MV.pb)<=RD2b$bw[1]),], formula=avgVS ~ MV.pb + win10 + urbanRural + lnpop2010))
	m2bses <-cluster.vcov(m2b, as.matrix(samp[which(abs(samp$MV.pb)<=RD2b$bw[1]),]$province))
	m2bses <- coeftest(m2b, m2bses)[,2]


	summary(m2bOLS <-lm(data=samp, formula=avgVS ~ MV.pb + win10 + urbanRural + lnpop2010))
	m2bOLSses <-cluster.vcov(m2bOLS, as.matrix(samp$province))
	m2bOLSses <- coeftest(m2bOLS, m2bOLSses)[,2]

summary(m2c <-lm(data=samp[which(abs(samp$MV.pb)<=RD2c$bw[1]),], formula=percentRelWin ~ MV.pb + win10 + urbanRural + lnpop2010))
	m2cses <-cluster.vcov(m2c, as.matrix(samp[which(abs(samp$MV.pb)<=RD2c$bw[1]),]$province))
	m2cses <- coeftest(m2c, m2cses)[,2]

	# t<-complete.cases(samp[,c("percentRelWin","MV.pb","win10","urbanRural","lnpop2010")])
	summary(m2cOLS <-lm(data=samp, formula=percentRelWin ~ MV.pb + win10 + urbanRural + lnpop2010))
	m2cOLSses <- cluster.vcov(m2cOLS, as.matrix(samp$province))
	m2cOLSses <- coeftest(m2cOLS, m2cOLSses)[,2]


stargazer(m2aOLS, m2a, m2bOLS, m2b, m2cOLS, m2c, out="tables/ols2_wControls.tex", type="latex", digits=3, label="tab:ols2_wControls", dep.var.labels = c("Relatives/Candidates","Relative Vote Share","\\% Relatives Won"), covariate.labels=c("Margin of Victory, 2010", "Win, 2010","Urban","Population (ln), 2010","Constant"), title="Parametric analysis of the effect of incumbency on relative entry, vote share and winning. Standard errors are clustered by province. Uses rectangular kernel weights. Models 1, 3, and 5 use the entirity of the data, while 2, 4, and 6 use only points within the IK Bandwith. The coefficient of interest is `Win, 2010'.", add.lines=list(c("BW",1,round(RD2a$bw[1],3), 1 ,round(RD2b$bw[1],3), 1, round(RD2c$bw[1],3))), omit.stat=c("f","ser"), se=list(m2aOLSses, m2ases, m2bOLSses, m2bses, m2cOLSses, m2cses), float.env="table*", model.names=TRUE)
















# -------------------------------------- #
# ----- PUBLICATION QUALITY TABLES ----- #
# -------------------------------------- #
# Functions
# library(ggplot2)
# library(grid)
# library(gridExtra)
# library(ggthemes)

xtRdd <- function(model, dv.label="DV:", lcol=TRUE){
	require(tables)
	ml <- lapply(model$model, function(x){summary(x)$coefficients})
	ml <- lapply(ml, function(x){x[2,]})
	ml <- lapply(ml, function(x){matrix(x, dimnames=list(c(),c(paste(dv.label))))})
	mall <- do.call("rbind",ml)
		mall <- data.frame(rep(c("Estimate","Std. Error","t value","Pr(>|t|)"),3), mall)
		mall <-  cbind(c("100% BW","","","","50% BW","","","","200% BW","","",""),mall)

		colnames(mall) <- NULL;rownames(mall) <- NULL

# Add Stars
		pvals <- unlist(mall[c(4,8,12),3])
		mall <- cbind(mall, sig=rep(NA,length(mall[,1])))
		mall[c(1,5,9),4] <- symnum(pvals, corr = FALSE, cutpoints = c(0, .001,.01, .05, .1, 1), symbols = c("***","**","*","."," "))



	# Add BW
		mall[c(3,7,11),3] <- (model$bw)
			mall[,2] <- as.character(mall[,2])
			mall[c(3,7,11),2] <- "Bandwidth"
	# Add N
		mall[c(4,8,12),3] <- model$obs
			mall[c(4,8,12),2] <- "Observations"
	# Round Numeric Column
		mall[,3] <- round(mall[,3],3)

	# Prettyfy SEs
		mall[c(2,6,10),3]<- paste0("(",mall[c(2,6,10),3],")")

	# Column Names
		# colnames(mall) <- NULL
		# colnames(mall)[length(colnames(mall))] <- dv.label

	# Print with line breaks
	if(lcol==TRUE){
		mall <- mall # , align="l|lrrl", type="html")
	}
	else{mall <- mall[,-c(1:2)]} # , align="|rrl", type="html")

	return(mall)
}

a <- cbind(xtRdd(model=RD2a), xtRdd(model=RD2b, lcol=FALSE), xtRdd(model=RD2c, lcol=FALSE))
colnames(a) <- NULL
addtorow <- list()
	addtorow$pos <- list(-1,2,6,10)
	addtorow$command <- c("\\multicolumn{2}{c}{} & \\multicolumn{2}{c}{Running for Office} & \\multicolumn{2}{c}{Family Vote Share} & \\multicolumn{2}{c}{\\% Relatives Won} \\\\", "& & &  & & & & \\\\", "& & &  & & & & \\\\", "& & &  & & & & \\\\")

print.xtable(xtable(a, label="tab:rd2", digits=3, align="lll|rl|rl|rl", caption="Regression discontinuity results for relative incumbency advantage. Local linear regressions are performed to either side of the cutpoint using triangular kernels. Results at 100\\%, 50\\% and 200\\% of the Imbens-Kalyanaraman optimal bandwidths are displayed. Standard errors are clustered by province. * denotes significance at the 10\\%, ** at the 5\\% and, *** at the 1\\% level."), digits = c(0, rep(3, ncol(a))), hline.after=c(0,4,8,12), include.rownames = FALSE, include.colnames = FALSE, type="latex", floating.environment="table*", file="tables/RD2.tex", add.to.row=addtorow)

# \multicolumn{20}{l}{Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1}





# ---------------------------- #
# ----- Coefficient Plot ----- #
# ---------------------------- #
samp2 <- samp
samp2$propRel <- scale(samp2$propRel, center=TRUE, scale=TRUE)
samp2$avgVS <- scale(samp2$avgVS, center=TRUE, scale=TRUE)
samp2$percentRelWin <- scale(samp2$percentRelWin, center=TRUE, scale=TRUE)

# DV = Candidate Selection (number of relatives running in 2013) [SAMPLE]
RD2a <- RDestimate(formula= propRel ~ MV.pb, cutpoint=0, subset=NULL, data=samp2, cluster=as.factor(samp$province), model=TRUE, bw=NULL)
summary(RD2a)
plot(RD2a)

# DV = Family Vote Share [SAMPLE]
RD2b <- RDestimate(formula= famVS ~ MV.pb, cutpoint=0, subset=NULL, data=samp2, cluster=as.factor(samp$province), model=TRUE, bw=NULL)
RD2b <- RDestimate(formula= avgVS ~ MV.pb, cutpoint=0, subset=NULL, data=samp2, cluster=as.factor(samp$province), model=TRUE, bw=NULL)
summary(RD2b)
plot(RD2b)

# DV = Proportion Seats to Relatives [SAMPLE]
RD2c <- RDestimate(formula= percentRelWin ~ MV.pb, cutpoint=0, subset=NULL, data=samp2, cluster=as.factor(samp$province), model=TRUE, bw=NULL)
summary(RD2c)
plot(RD2c)


# ----------------------------- #
# ------ Coefficient Plot ----- #
# ----------------------------- #

# library(ggplot2)
# library(grid)
# library(gridExtra)
# library(ggthemes)

getcoef <- function(x){x$model[[1]]$coefficients["Tr"]}
getses <- function(x){summary(x$model[[1]])$coefficients[,2][2]}
getses <- function(x){summary(x)$coefficients[1, "Std. Error"]}

ms <- list(RD2a, RD2b, RD2c)
cs <- unlist(lapply(ms, getcoef))
ses <- unlist(lapply(ms, getses))
upr <- cs + (1.96*ses)
lwr <- cs - (1.96*ses)
dat <- data.frame(cs=as.numeric(cs), upr=as.numeric(upr), lwr=as.numeric(lwr), group=as.factor(1:length(cs))) #
# dat$group <- as.factor(dat$group)
rownames(dat) <- NULL
dat <- data.frame(dat, type=factor(x=c("Running for Office","Vote Share","Election"), levels=c("Running for Office","Vote Share","Election")))

ap <- ggplot(dat, aes(y=cs, x=type, colour=type)) + geom_hline(x=0, col="red", alpha=.5) + geom_point(stat="identity") + geom_errorbar(aes(ymax=upr, ymin=lwr), position=position_dodge(0.9), width=.5) + theme_pander() + theme(legend.position="none")+ ylab("Standard Deviations") + xlab("") + theme(axis.text=element_text(size=20), axis.title.y = element_text(size=20), plot.title = element_text(size=20)) + labs(title="Relatives")

pdf("figures/rdd2Coefficients.pdf", width=8, height=5)
	ap + scale_y_continuous(limits = c(-.2,.6))
dev.off()



